Artificial neural networks (ANN), also know as connectionist systems, are computing system that is inspired by animal brain neural networks. ANNs are composed of artificial neurons. These neurons are connected using connections. For a given neuron, there are multiple input and output connections.
Artificial neural network components:
There is also an element of (machine) learning. Roughly speaking, learning consists of two parts forward propagation and Backpropagation. Together with a cost function, the algorithm runs iteratively with the object of decreasing Loss and increasing the accuracy.
In this article, we develop a Pytorch Artificial Neural Network model. The number of layers can be adjusted, of course. There are a large number of resources on choosing the right number of hidden layers such as [1]. Generally speaking, we do not recommend adding too many hidden layers. For a large number of problems, one hidden layer is sufficient.
![]()
a random n-class classification dataset can be generated using sklearn.datasets.make_classification. Here, we generate a dataset with two features and 1000 instances. Moreover, the dataset is generated for multiclass classification with five classes.
import numpy as np
import pandas as pd
from sklearn.datasets import make_classification
from num2words import num2words
n_features =2
n_classes = 2
X, y = make_classification(n_samples = int((n_classes-1)*1e3),
n_features = n_features, n_redundant=0, n_classes = n_classes,
n_informative=2, random_state=1, n_clusters_per_class=1)
Labels_dict = dict(zip(list(np.unique(y)), [num2words(x).title() for x in np.unique(y)]))
Data = pd.DataFrame(data = X, columns = ['Feature %i' % (i+1) for i in range(n_features)])
Target = 'Outcome Variable'
Data[Target] = y
display(Data)
from HD_DeepLearning import Plot_Data
PD = dict(BP = .5, alpha=.7, bg_alpha = 0.25, grid = True, cricle_size = 50,
FigSize = 7, h=0.02, pad=1, ColorMap = 'Set1', Labels = list(Labels_dict.values()))
Plot_Data(X, y, PD = PD, Labels_dict = Labels_dict, ax = None)
Pull = [.01 for x in range((len(Labels_dict)-1))]
Pull.append(.1)
import plotly.express as px
from HD_DeepLearning import DatasetTargetDist
PD = dict(PieColors = px.colors.sequential.Rainbow[0:-1:3], TableColors = ['Navy','White'], hole = .4,
row_heights=[0.35, 0.65],textfont = 14, height = 500, tablecolumnwidth = [0.25, 0.15, 0.15],
pull = Pull, legend_title = Target, title_x = 0.5, title_y = .9, pie_legend = [0.1, 0.12])
del Pull
DatasetTargetDist(Data, Target, Labels_dict, PD)
StratifiedKFold is a variation of k-fold which returns stratified folds: each set contains approximately the same percentage of samples of each target class as the complete set.
from sklearn.model_selection import StratifiedShuffleSplit
Test_Size = 0.3
sss = StratifiedShuffleSplit(n_splits=1, test_size=Test_Size, random_state=42)
_ = sss.get_n_splits(X, y)
for train_index, test_index in sss.split(X, y):
# X
if isinstance(X, pd.DataFrame):
X_train, X_test = X.loc[train_index], X.loc[test_index]
else:
X_train, X_test = X[train_index], X[test_index]
# y
if isinstance(y, pd.Series):
y_train, y_test = y[train_index], y[test_index]
else:
y_train, y_test = y[train_index], y[test_index]
del sss
from HD_DeepLearning import Train_Test_Dist
PD.update(dict(column_widths=[0.3, 0.3, 0.3], tablecolumnwidth = [0.2, 0.4], height = 550, legend_title = Target))
Train_Test_Dist(X_train, y_train, X_test, y_test, PD, Labels_dict)
A multi-layer perceptron (MLP) is a class of feedforward artificial neural network (ANN). The algorithm at each iteration uses the Cross-Entropy Loss to measure the loss, and then the gradient and the model update is calculated. At the end of this iterative process, we would reach a better level of agreement between test and predicted sets since the error would be lower from that of the first step.
import torch
def TorchSets(Set):
# Inut: Arrays
# GPU Cuda
if isinstance(Set, (pd.DataFrame, pd.Series)):
Set = Set.values
if torch.cuda.is_available():
if Set.ndim==1:
Out = torch.autograd.Variable(torch.from_numpy(Set).type(torch.LongTensor).cuda())
else:
Out = torch.autograd.Variable(torch.from_numpy(Set).cuda())
# CPU
else:
if Set.ndim==1:
Out = torch.autograd.torch.autograd.Variable(torch.from_numpy(Set).type(torch.LongTensor))
else:
Out = torch.autograd.torch.autograd.Variable(torch.from_numpy(Set))
return Out
# Tensors
X_train_tensor = TorchSets(X_train)
y_train_tensor = TorchSets(y_train)
X_test_tensor = TorchSets(X_test)
y_test_tensor = TorchSets(y_test)
Batch_size = 100
iteration_number = int(1e4)
epochs_number = int(iteration_number / (len(X_train) / Batch_size))
# Pytorch train and test sets
Train_set = torch.utils.data.TensorDataset(X_train_tensor, y_train_tensor)
Test_set = torch.utils.data.TensorDataset(X_test_tensor, y_test_tensor)
# data loader
train_loader = torch.utils.data.DataLoader(Train_set, batch_size = Batch_size, shuffle = False)
test_loader = torch.utils.data.DataLoader(Train_set, batch_size = Batch_size, shuffle = False)
class MLP_Model(torch.nn.Module):
'''
A MLP model with two hidden layers
'''
def __init__(self, input_Size, hidden_Size, output_Size):
super(MLP_Model, self).__init__()
# Input Layer to the 1st Layer:
self.fc1 = torch.nn.Linear(input_Size, hidden_Size)
torch.nn.init.kaiming_uniform_(self.fc1.weight, nonlinearity='relu')
self.act1 = torch.nn.ReLU()
# 1st Layer to 2nd Layer
self.fc2 = torch.nn.Linear(hidden_Size, int(hidden_Size/4))
torch.nn.init.kaiming_uniform_(self.fc2.weight, nonlinearity='relu')
self.act2 = torch.nn.ReLU()
# 2nd layer to Output Layer
self.fc3 = torch.nn.Linear(int(hidden_Size/4), output_Size)
torch.nn.init.kaiming_uniform_(self.fc3.weight)
self.act3 = torch.nn.Sigmoid()
def forward(self, x):
# Input Layer to the 1st Layer:
out = self.fc1(x)
# Non-linearity 1
out = self.act1(out)
# 1st Layer to 2nd Layer
out = self.fc2(out)
out = self.act2(out)
# 2nd layer to Output Layer
out = self.fc3(out)
out = self.act3(out)
return out
Fitting the model
input_Size, output_Size = n_features, len(Labels_dict)
hidden_Size = 256
# model
model = MLP_Model(input_Size, hidden_Size, output_Size)
# GPU
if torch.cuda.is_available():
model.cuda()
# Cross Entropy Loss
criterion= torch.nn.CrossEntropyLoss()
# Optimizer
optimizer = torch.optim.SGD(model.parameters(), lr= 1e-2, momentum=.9)
# Traning the Model
Count = 0
Loss_list = []
Iteration_list = []
Accuracy_list = []
MSE_list = []
MAE_list = []
Steps = 10
import progressbar
Progress_Bar = progressbar.ProgressBar(maxval= iteration_number + 200,
widgets=[progressbar.Bar('=', '|', '|'),
progressbar.Percentage()])
for epoch in range(epochs_number):
for i, (Xtr, ytr) in enumerate(train_loader):
# Variables
Xtr = torch.autograd.Variable(Xtr.view(-1, n_features))
ytr = torch.autograd.Variable(ytr)
# Set all gradients to zero
optimizer.zero_grad()
# Forward
Out = model(Xtr.float())
# loss
loss = criterion(Out, ytr.long())
# Backward (Calculating the gradients)
loss.backward()
# Update parameters
optimizer.step()
Count += 1
del Xtr, ytr
# Predictions
if Count % Steps == 0:
# Calculate Accuracy
Correct, Total = 0, 0
# Predictions
for Xts, yts in test_loader:
Xts = torch.autograd.Variable(Xts.view(-1, n_features))
# Forward
Out = model(Xts.float())
# The maximum value of Out
Predicted = torch.max(Out.data, 1)[1]
# Total number of yts
Total += len(yts)
# Total Correct predictions
Correct += (Predicted == yts).sum()
del Xts, yts
# storing loss and iteration
Loss_list.append(loss.data)
Iteration_list.append(Count)
Accuracy_list.append(Correct / float(Total))
Progress_Bar.update(Count)
Progress_Bar.finish()
history = pd.DataFrame({'Iteration': np.array(Iteration_list),
'Loss': np.array([x.cpu().data.numpy() for x in Loss_list]),
'Accuracy': np.array([x.cpu().data.numpy() for x in Accuracy_list])})
del Loss_list, Iteration_list, Accuracy_list
Model Performance
from HD_DeepLearning import Plot_history
PD = dict(row_heights = [0.4, 0.6], lw = 1.5, font_size=12, height = 700, yLim = 1,
th_line_color = 'Navy', th_fill_color='darkslategray', table_columnwidth = [0.4, 0.4, 0.4, 0.4],
tc_line_color = 'Navy', tc_fill_color = None, title_x = 0.46, title_y = 0.92, tb_cell_heigh = 20,
Number_Format = '%.4e')
Plot_history(history, PD, Title = 'Test Set', Colors = ['DarkGreen', 'Red'])
from HD_DeepLearning import Plot_Classification_Torch
import matplotlib.pyplot as plt
PD = dict(BP = .5, alpha=.7, bg_alpha = 0.15, grid = False, cricle_size = 50,
FigSize = 7, h=0.02, pad=1, ColorMap = 'bwr', Labels = list(Labels_dict.values()))
fig, ax = plt.subplots(1, 2, figsize=(16, 7))
# Train Set
Plot_Classification_Torch(model, X_train, y_train, PD = PD, ax = ax[0])
_ = ax[0].set_title('Train Set', fontsize = 16, weight='bold')
# Test Set
Plot_Classification_Torch(model, X_test, y_test, PD = PD, ax = ax[1])
_ = ax[1].set_title('Test Set', fontsize = 16, weight='bold')
The confusion matrix allows for visualization of the performance of an algorithm. Note that due to the size of data, here we don't provide a Cross-validation evaluation. In general, this type of evaluation is preferred.
from sklearn import metrics
# Train
y_pred = model(X_train_tensor.float())
y_pred = torch.max(y_pred.data, 1)[1]
y_pred = y_pred.cpu().data.numpy()
Reports_Train = pd.DataFrame(metrics.classification_report(y_train, y_pred, target_names=list(Labels_dict.values()),
output_dict=True)).T
CM_Train = metrics.confusion_matrix(y_train, y_pred)
# Test
y_pred = model(X_test_tensor.float())
y_pred = torch.max(y_pred.data, 1)[1]
y_pred = y_pred.cpu().data.numpy()
Reports_Test = pd.DataFrame(metrics.classification_report(y_test, y_pred, target_names=list(Labels_dict.values()),
output_dict=True)).T
CM_Test = metrics.confusion_matrix(y_test, y_pred)
Reports_Train = Reports_Train.reset_index().rename(columns ={'index': 'Train Set'})
Reports_Test = Reports_Test.reset_index().rename(columns ={'index': 'Test Set'})
display(Reports_Train.style.hide(axis='index').set_properties(**{'background-color': 'HoneyDew', 'color': 'Black'}).\
set_properties(subset=['Train Set'], **{'background-color': 'SeaGreen', 'color': 'White'}))
display(Reports_Test.style.hide(axis='index').set_properties(**{'background-color': 'Azure', 'color': 'Black'}).\
set_properties(subset=['Test Set'], **{'background-color': 'RoyalBlue', 'color': 'White'}))
from HD_DeepLearning import Confusion_Mat
PD = dict(FS = (10, 5), annot_kws = 14, shrink = .6, Labels = list(Labels_dict.values()))
Confusion_Mat(CM_Train, CM_Test, PD = PD, n_splits = None)